# LOAD NECESSARY PACKAGES ------------------------------------------------------

library(pacman)

# tidyverse
p_load(dplyr, tidyr, stringr, ggplot2, forcats, srvyr, purrr, tidylog)

# misc
p_load(here, patchwork, readstata13, naniar, WDI)

# custom 
# sum with na.rm = T unless ALL NA
suma = function(x) if (all(is.na(x))) x[NA_integer_] else sum(x, na.rm = TRUE)

# HOUSEHOLD LONGITUDINAL DATASET -----------------------------------------------

# Household tracking -----------------------------------------------------------

eth_track_2011 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect_cover_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  transmute(
    hhid = household_id,
    ea = ea_id,
    year_weights = pw,
    rural = as.numeric(rural == 1),
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    hhid = paste0(ea, hhid),
    hhid2 = household_id,
    year = 2011)

eth_track_2013 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect_cover_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # Remove households that split
  mutate(hhid = ifelse(household_id == "", household_id2, household_id)) %>% 
  group_by(hhid) %>% 
  mutate(
    nobs = length(household_id)) %>% 
  filter(nobs == 1) %>% 
  select(-nobs) %>% 
  ungroup() %>% 
  transmute(
    hhid = ifelse(household_id == "", household_id2, household_id),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    year_weights = pw2,
    rural = as.numeric(rural == 1),
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    hhid = paste0(ea, hhid),
    hhid2 = household_id2,
    year = 2013)

eth_track_2015 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect_cover_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # Remove households that split
  mutate(
    hhid = ifelse(household_id == "", household_id2, household_id)) %>% 
  group_by(hhid) %>% 
  mutate(
    nobs = length(household_id)) %>% 
  filter(nobs == 1) %>% 
  select(-nobs) %>% 
  ungroup() %>% 
  transmute(
    hhid = ifelse(household_id == "", household_id2, household_id),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    year_weights = pw_w3,
    rural = as.numeric(rural == 1),
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,

    hhid = paste0(ea, hhid),
    hhid2 = household_id2,
    year = 2015) 

# Pooled
eth_pool_1admin <- do.call("rbind", list(eth_track_2011, eth_track_2013, eth_track_2015))
rm(eth_track_2011, eth_track_2013, eth_track_2015)

# Household demographics -------------------------------------------------------
eth_2011_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect1_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  mutate(
    year = 2011,
    hh_role = hh_s1q02,
    above15 = as.numeric(hh_s1q04_a > 15),
    adultage = ifelse(above15 == 1, hh_s1q04_a, NA),
    adultgender = ifelse(above15 == 1, hh_s1q03, NA)) %>% 
  group_by(year, hhid) %>% 
  summarise(
    hh_size = sum(above15 == 1, na.rm = T),
    hh_avgage = mean(adultage, na.rm = T),
    hh_avgmale = mean(adultgender == "1", na.rm = T),
    hh_headmarr = sum(hh_role == 1 & hh_s1q08 %in% c(2,3))) %>% 
  ungroup() %>% 
  select(year, hhid, hh_size, hh_avgage, hh_avgmale, hh_headmarr) %>% 
  ungroup() 

indiv_2011 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect1_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  transmute(
    hhid = hhid,
    indiv = paste0(hhid,"_",individual_id),
    above15 = as.numeric(hh_s1q04_a > 15),
    above15 = ifelse(is.na(above15), 0, above15),
    female = case_when(
      hh_s1q03 == 2 ~ 1,
      hh_s1q03 == 1 ~ 0,
      TRUE ~ NA_real_)) %>% 
  # remove split households
  filter(hhid %in% (eth_pool_1admin %>% filter(year == 2011) %>% pull(hhid)))

eth_2013_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect1_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),

    hhid = paste0(ea, hhid)) %>% 
  mutate(
    year = 2013,
    hh_role = hh_s1q02,
    above15 = as.numeric(hh_s1q04_a > 15),
    # still a member of the household OR a new household member (NA)
    stillmember = as.numeric(hh_s1q04c == 1 | is.na(hh_s1q04c)),
    adultage = ifelse(above15 == 1, hh_s1q04_a, NA),
    adultgender = ifelse(above15 == 1, hh_s1q03, NA)) %>% 
  group_by(year, hhid) %>% 
  summarise(
    hh_size = sum(above15 == 1 & stillmember == 1, na.rm = T),
    hh_avgage = mean(adultage, na.rm = T),
    hh_avgmale = mean(adultgender == "1" & stillmember == 1, na.rm = T),
    hh_headmarr = sum(hh_role == 1 & hh_s1q08 %in% c(2,3))) %>% 
  ungroup() %>% 
  select(year, hhid, hh_size, hh_avgage, hh_avgmale, hh_headmarr)

indiv_2013 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect1_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  transmute(
    hhid = hhid,
    indiv = paste0(hhid,"_",individual_id),
    above15 = as.numeric(hh_s1q04_a > 15),
    above15 = ifelse(is.na(above15), 0, above15),
    female = case_when(
      hh_s1q03 == 2 ~ 1,
      hh_s1q03 == 1 ~ 0,
      TRUE ~ NA_real_),
    # still a member of the household OR a new household member (NA)
    stillmember = as.numeric(hh_s1q04c == 1 | is.na(hh_s1q04c)))

eth_2015_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect1_hh_w3.dta"), 
  generate.factors = F, convert.factors = F)  %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),

    hhid = paste0(ea, hhid)) %>% 
  mutate(
    year = 2015,
    hh_role = hh_s1q02,
    above15 = as.numeric(hh_s1q04a > 15),
    # still a member of the household OR a new household member (NA)
    stillmember = as.numeric(hh_s1q04c == 1 | is.na(hh_s1q04c)),
    adultage = ifelse(above15 == 1, hh_s1q04a, NA),
    adultgender = ifelse(above15 == 1, hh_s1q03, NA)) %>% 
  group_by(year, hhid) %>% 
  summarise(
    hh_size = sum(above15 == 1 & stillmember == 1, na.rm = T),
    hh_avgage = mean(adultage, na.rm = T),
    hh_avgmale = mean(adultgender == "1" & stillmember == 1, na.rm = T),
    hh_headmarr = sum(hh_role == 1 & hh_s1q08 %in% c(2,3))) %>% 
  ungroup() %>% 
  select(year, hhid, hh_size, hh_avgage, hh_avgmale, hh_headmarr) 

indiv_2015 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect1_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  transmute(
    hhid = hhid,
    indiv = paste0(hhid,"_",individual_id),
    above15 = as.numeric(hh_s1q04a > 15),
    above15 = ifelse(is.na(above15), 0, above15),
    female = case_when(
      hh_s1q03 == 2 ~ 1,
      hh_s1q03 == 1 ~ 0,
      TRUE ~ NA_real_),
    # still a member of the household OR a new household member (NA)
    stillmember = as.numeric(hh_s1q04c == 1 | is.na(hh_s1q04c)))

# Pooled
eth_pool_2dem <- do.call("rbind", list(eth_2011_sec1, eth_2013_sec1, eth_2015_sec1))
rm(eth_2011_sec1, eth_2013_sec1, eth_2015_sec1)

# Household highest years of education -----------------------------------------
eth_2011_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect2_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  left_join(indiv_2011) %>% 
  group_by(hhid) %>% 
  mutate(
    hh_s2q05 = ifelse(hh_s2q03 == 2 | is.na(hh_s2q03),0,hh_s2q05)) %>% 
  summarise(hh_educ = max(hh_s2q05, na.rm = T)) %>% 
  mutate(
    year = 2011,
    hh_educ = ifelse(is.infinite(hh_educ), NA_real_, hh_educ),
    hh_educ = case_when(
      hh_educ %in% c(98) ~ 0,
      hh_educ %in% c(93, 94, 95, 96) ~ 5,
      hh_educ %in% c(9, 21) ~ 9,
      hh_educ %in% c(10, 22) ~ 10,
      hh_educ %in% c(11, 23) ~ 11,
      hh_educ %in% c(12, 24) ~ 12,
      hh_educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      hh_educ %in% c(17, 30) ~ 15,
      hh_educ %in% c(18, 31, 32, 33) ~ 16,
      hh_educ %in% c(19, 34) ~ 17,
      hh_educ %in% c(20, 35) ~ 18,
      TRUE ~ hh_educ)) %>%
  ungroup()

eth_2013_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect2_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  left_join(indiv_2013) %>% 
  group_by(hhid) %>% 
  mutate(
    hh_s2q05 = ifelse(hh_s2q03 == 2,0,hh_s2q05)) %>% 
  summarise(hh_educ = max(hh_s2q05, na.rm = T)) %>% 
  mutate(
    year = 2013,
    hh_educ = ifelse(is.infinite(hh_educ), NA_real_, hh_educ),
    hh_educ = case_when(
      hh_educ %in% c(98) ~ 0,
      hh_educ %in% c(93, 94, 95, 96) ~ 5,
      hh_educ %in% c(9, 21) ~ 9,
      hh_educ %in% c(10, 22) ~ 10,
      hh_educ %in% c(11, 23) ~ 11,
      hh_educ %in% c(12, 24) ~ 12,
      hh_educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      hh_educ %in% c(17, 30) ~ 15,
      hh_educ %in% c(18, 31, 32, 33) ~ 16,
      hh_educ %in% c(19, 34) ~ 17,
      hh_educ %in% c(20, 35) ~ 18,
      hh_educ %in% c(76) ~ NA_real_,
      TRUE ~ hh_educ)) %>%
  ungroup()

eth_2015_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect2_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  left_join(indiv_2015) %>% 
  group_by(hhid) %>% 
  mutate(
    hh_s2q05 = ifelse(hh_s2q03 == 2,0,hh_s2q05)) %>% 
  summarise(hh_educ = max(hh_s2q05, na.rm = T)) %>% 
  mutate(
    year = 2015,
    hh_educ = ifelse(is.infinite(hh_educ), NA, hh_educ),
    hh_educ = case_when(
      hh_educ %in% c(98) ~ 0,
      hh_educ %in% c(93, 94, 95, 96) ~ 5,
      hh_educ %in% c(9, 21) ~ 9,
      hh_educ %in% c(10, 22) ~ 10,
      hh_educ %in% c(11, 23) ~ 11,
      hh_educ %in% c(12, 24) ~ 12,
      hh_educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      hh_educ %in% c(17, 30) ~ 15,
      hh_educ %in% c(18, 31, 32, 33) ~ 16,
      hh_educ %in% c(19, 34) ~ 17,
      hh_educ %in% c(20, 35) ~ 18,
      TRUE ~ hh_educ)) %>%
  ungroup()

# Pooled
eth_pool_3educ <- do.call("rbind", list(eth_2011_sec2, eth_2013_sec2, eth_2015_sec2))
rm(eth_2011_sec2, eth_2013_sec2, eth_2015_sec2)

# Household assets -------------------------------------------------------------

assets <- tibble::tribble(
  ~item_cd,       ~asset,
  5,       "asset_Mattress",
  8,      "asset_MobilePhone",
  10,     "asset_TV",
  13,     "asset_Sofa",
  14,     "asset_Bicycle",
  15,   "asset_Motorbike",
  22,     "asset_Fridge",
  23,        "asset_Car",
  
  
)

eth_2011_sec10 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect10_hh_w1.dta"), 
  generate.factors = F, convert.factors = F
  ) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>%  
  filter(hh_s10q00 %in% assets$item_cd) %>% 
  left_join(assets, by = c("hh_s10q00" = "item_cd")) %>% 
  select(hhid, hh_s10q01, asset) %>% 
  mutate(
    hh_s10q01 = as.numeric(hh_s10q01 > 0)) %>% 
  pivot_wider(names_from = asset, values_from = hh_s10q01) %>% 
  mutate(year = 2011)

eth_2013_sec10 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect10_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  filter(hh_s10q00 %in% assets$item_cd) %>% 
  left_join(assets, by = c("hh_s10q00" = "item_cd")) %>% 
  select(hhid, hh_s10q01, asset) %>% 
  mutate(
    hh_s10q01 = as.numeric(hh_s10q01 > 0)) %>% 
  pivot_wider(names_from = asset, values_from = hh_s10q01) %>% 
  mutate(
    year = 2013)

eth_2015_sec10 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect10_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  filter(hh_s10q00 %in% assets$item_cd) %>% 
  left_join(assets, by = c("hh_s10q00" = "item_cd")) %>% 
  select(hhid, hh_s10q01, asset) %>% 
  mutate(
    hh_s10q01 = as.numeric(hh_s10q01 > 0)) %>% 
  pivot_wider(names_from = asset, values_from = hh_s10q01) %>% 
  mutate(
    year = 2015)

# Panel
eth_pool_5asset <- do.call("rbind", list(eth_2011_sec10, eth_2013_sec10, eth_2015_sec10))
rm(eth_2011_sec10, eth_2013_sec10, eth_2015_sec10, assets)

# Household electricity source -------------------------------------------------

eth_2011_sec9 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect9_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  transmute(
    hhid = hhid,
    year = 2011,
    el_grid = as.numeric(hh_s9q19 %in% c(1,2)),
    el_dgen = as.numeric(hh_s9q19 %in% c(3)),
    hh_rooms = hh_s9q04) %>% 
  ungroup()

eth_2013_sec9 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect9_hh_w2.dta"), 
  generate.factors = F, convert.factors = F)  %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  transmute(
    hhid = hhid,
    year = 2013,
    el_grid = as.numeric(hh_s9q19_a %in% c(1,2)),
    el_dgen = as.numeric(hh_s9q19_a %in% c(3)),
    hh_rooms = hh_s9q04) %>% 
  ungroup()

eth_2015_sec9 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect9_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  transmute(
    hhid = hhid,
    year = 2015,
    el_grid = as.numeric(hh_s9q19_a %in% c(1,2)),
    el_dgen = as.numeric(hh_s9q19_a %in% c(3)),
    hh_rooms = hh_s9q04) %>% 
  ungroup()

# Panel
eth_pool_6elec <- do.call("rbind", list(eth_2011_sec9, eth_2013_sec9, eth_2015_sec9))
rm(eth_2011_sec9, eth_2013_sec9, eth_2015_sec9)

# Household non-farm entrepreneurship ------------------------------------------

gender_2011 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect1_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  transmute(
    hhid = hhid,
    female = as.numeric(hh_s1q03 == 2),
    female = ifelse(is.na(female), 0, female)) %>% 
  group_by(hhid) %>% 
  summarise(female_vec = paste(female, collapse = ''))

eth_2011_sec11 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect11b_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  left_join(gender_2011) %>% 
  group_by(hhid) %>% 
  transmute(
    # Any enterprise not permanently closed (i.e. had some revenue past 12 months)
    any = as.numeric(!is.na(hh_s11bq13) & hh_s11bq13 > 0),
    # Retail enterprises not permanently closed
    retail = as.numeric(hh_s11bq01_b == 7 & any == 1),
    # Manufacturing enterprises not permanently closed
    manuf = as.numeric(hh_s11bq01_b == 4 & any == 1),
    # NFE estimated annual revenues 
    annrev = as.numeric(hh_s11bq09 * hh_s11bq13),
    firstowner = hh_s11bq03_a,
    secondowner = hh_s11bq03_b,
    nfe_wmn = case_when(
      substr(female_vec,firstowner,firstowner) == 1 |
        substr(female_vec,secondowner,secondowner) == 1 ~ 1,
      TRUE ~ 0)
  ) %>% 
  group_by(hhid) %>% 
  # Determine total NFEs (+ retail / manuf) with at least some reported annual revenue
  summarise(nfe_tot = sum(annrev > 0),
            nfe_retail = sum((retail*annrev) > 0),
            nfe_manuf = sum((manuf*annrev) > 0),
            nfe_wmntot = sum(any == 1 & nfe_wmn == 1 & annrev > 0),
            nfe_wmnretail = sum(as.numeric(retail == 1 & nfe_wmn == 1 & annrev > 0)),
            nfe_wmnmanuf = sum(as.numeric(manuf == 1 & nfe_wmn == 1 & annrev > 0))
  ) %>% 
  mutate(year = 2011) %>% 
  ungroup()

gender_2013 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect1_hh_w2.dta"), 
  generate.factors = F, convert.factors = F)  %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  transmute(
    hhid = hhid,
    female = as.numeric(hh_s1q03 == 2),
    female = ifelse(is.na(female), 0, female)) %>% 
  group_by(hhid) %>% 
  summarise(female_vec = paste(female, collapse = ''))

eth_2013_sec11 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect11b_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%
  left_join(gender_2013) %>% 
  group_by(hhid) %>% 
  transmute(
    # Any enterprise not permanently closed (i.e. had some revenue past 12 months)
    any = as.numeric(!is.na(hh_s11bq13) & hh_s11bq13 > 0),
    # Retail enterprises not permanently closed
    retail = as.numeric(hh_s11bq01_b == 7 & any == 1),
    # Manufacturing enterprises not permanently closed
    manuf = as.numeric(hh_s11bq01_b == 4 & any == 1),
    # NFE estimated annual revenues 
    annrev = as.numeric(hh_s11bq09 * hh_s11bq13),
    firstowner = hh_s11bq03_a,
    secondowner = hh_s11bq03_b,
    nfe_wmn = case_when(
      substr(female_vec,firstowner,firstowner) == 1 |
        substr(female_vec,secondowner,secondowner) == 1 ~ 1,
      TRUE ~ 0)
  ) %>% 
  group_by(hhid) %>% 
  # Determine total NFEs (+ retail / manuf) with at least some reported annual revenue
  summarise(nfe_tot = sum(annrev > 0),
            nfe_retail = sum((retail*annrev) > 0),
            nfe_manuf = sum((manuf*annrev) > 0),
            nfe_wmntot = sum(any == 1 & nfe_wmn == 1 & annrev > 0),
            nfe_wmnretail = sum(as.numeric(retail == 1 & nfe_wmn == 1 & annrev > 0)),
            nfe_wmnmanuf = sum(as.numeric(manuf == 1 & nfe_wmn == 1 & annrev > 0))
  ) %>% 
  mutate(year = 2013) %>% 
  ungroup()

gender_2015 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect1_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%  
  transmute(
    hhid = hhid,
    female = as.numeric(hh_s1q03 == 2),
    female = ifelse(is.na(female), 0, female)) %>% 
  group_by(hhid) %>% 
  summarise(female_vec = paste(female, collapse = ''))

eth_2015_sec11 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect11b_hh_w3.dta"), 
  generate.factors = F, convert.factors = F)  %>% 
  # remove split households and all NA observations
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2)),
         !is.na(saq01)) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>%  
  left_join(gender_2015) %>%  
  group_by(hhid) %>% 
  transmute(
    # Any enterprise not permanently closed (i.e. had some revenue past 12 months)
    any = as.numeric(!is.na(hh_s11bq13) & hh_s11bq13 > 0),
    # Retail enterprises not permanently closed
    retail = as.numeric(hh_s11bq01_b == 7 & any == 1),
    # Manufacturing enterprises not permanently closed
    manuf = as.numeric(hh_s11bq01_b == 4 & any == 1),
    # NFE estimated annual revenues 
    annrev = as.numeric(hh_s11bq09 * hh_s11bq13),
    firstowner = hh_s11bq03_a,
    secondowner = hh_s11bq03_b,
    nfe_wmn = case_when(
      substr(female_vec,firstowner,firstowner) == 1 |
        substr(female_vec,secondowner,secondowner) == 1 ~ 1,
      TRUE ~ 0)
  ) %>% 
  group_by(hhid) %>% 
  # Determine total NFEs (+ retail / manuf) with at least some reported annual revenue
  summarise(nfe_tot = sum(annrev > 0),
            nfe_retail = sum((retail*annrev) > 0),
            nfe_manuf = sum((manuf*annrev) > 0),
            nfe_wmntot = sum(any == 1 & nfe_wmn == 1 & annrev > 0),
            nfe_wmnretail = sum(as.numeric(retail == 1 & nfe_wmn == 1 & annrev > 0)),
            nfe_wmnmanuf = sum(as.numeric(manuf == 1 & nfe_wmn == 1 & annrev > 0))
  ) %>% 
  mutate(year = 2015) %>% 
  ungroup()

# Panel
eth_pool_7nfe <- do.call("rbind", list(eth_2011_sec11, eth_2013_sec11, eth_2015_sec11))
rm(eth_2011_sec11, eth_2013_sec11, eth_2015_sec11, gender_2011, gender_2013, gender_2015)

# Household geovariables -------------------------------------------------------

eth_2011_hh_geo <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "Pub_ETH_HouseholdGeovariables_Y1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  transmute(
    hhid2 = household_id,
    year = 2011,
    dist_road = dist_road,
    dist_mkt = dist_market,
    dist_popcenter = dist_popcenter,
    dist_admctr = dist_admctr,
    pct_agr = fsrad3_agpct,
    latitude = LAT_DD_MOD,
    longitude = LON_DD_MOD)

eth_2013_hh_geo <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "Pub_ETH_HouseholdGeovars_Y2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  transmute(
    hhid2 = household_id2,
    year = 2013,
    dist_road = dist_road,
    dist_mkt = dist_market,
    dist_popcenter = dist_popcenter,
    dist_admctr = dist_admctr,
    pct_agr = fsrad3_agpct,
    latitude = lat_dd_mod,
    longitude = lon_dd_mod) %>% 
  # remove split households
  filter(hhid2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2)))

eth_2015_hh_geo <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Geovariables", "ETH_HouseholdGeovars_y3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  transmute(
    hhid2 = household_id2,
    year = 2015,
    dist_road = dist_road,
    dist_mkt = dist_market,
    dist_popcenter = dist_popcenter,
    dist_admctr = dist_admctr,
    pct_agr = fsrad3_agpct,
    latitude = lat_dd_mod,
    longitude = lon_dd_mod) %>% 
  # remove split households
  filter(hhid2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2)))

# Cross-section
eth_pool_8geo <- do.call("rbind", list(eth_2011_hh_geo, eth_2013_hh_geo,
                                       eth_2015_hh_geo))
rm(eth_2011_hh_geo, eth_2013_hh_geo,eth_2015_hh_geo)

# HOUSEHOLD LONGITUDINAL DATASET OUTPUT ----------------------------------------

eth_hh_panel <- list(eth_pool_1admin, eth_pool_2dem, eth_pool_3educ,
                     eth_pool_5asset, eth_pool_6elec,
                     eth_pool_7nfe, eth_pool_8geo) %>% 
  reduce(left_join) %>% 
  select(-hhid2) %>% 
  ungroup() %>% 
  # Remove households were lon-lat or rurality changes between first and last wave
  group_by(ea) %>% 
  filter(longitude == median(longitude) & latitude == median(latitude),
         rural == median(rural))

# Calculate final variables and summarise
eth_hh_panel <- eth_hh_panel %>% 
  # NFE NA values simply artefact of joining (see nfe tibble rows)
  mutate(across(matches("nfe"), ~ifelse(is.na(.),0,.))) %>% 
  ungroup() %>% 
  select(year, rural, admin1, admin2, admin3, ea, hhid, matches("el"), matches("hh_emp"), 
         matches("nfe"), everything()) %>% 
  ungroup() %>% 
  group_by(year, ea) %>% 
  mutate(com_grid = as.numeric(mean(el_grid) >= 0.5)) %>% 
  ungroup()

saveRDS(eth_hh_panel, here::here("Data", "ethiopia_hhpanel.rds"))

# INDIVUDUAL LONGITUDINAL DATASET ----------------------------------------------

# Individual demographics ------------------------------------------------------

eth_indiv_2011_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect1_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id)) %>% 
  filter(hh_s1q04_a > 15) %>% 
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2011,
    head = as.numeric(hh_s1q02 == 1),
    age = hh_s1q04_a,
    sexfem = as.numeric(hh_s1q03 == 2),
    mar = as.numeric(hh_s1q08 %in% c(2,3)))

eth_indiv_2013_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect1_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  filter(hh_s1q04_a > 15,
         hh_s1q04c == 1 | is.na(hh_s1q04c)) %>% 
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2013,
    head = as.numeric(hh_s1q02 == 1),
    age = hh_s1q04_a,
    sexfem = as.numeric(hh_s1q03 == 2),
    mar = as.numeric(hh_s1q08 %in% c(2,3)))

eth_indiv_2015_sec1 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect1_hh_w3.dta"), 
  generate.factors = F, convert.factors = F)  %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid)) %>% 
  filter(hh_s1q04a > 15,
         hh_s1q04c == 1 | is.na(hh_s1q04c)) %>% 
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2015,
    head = as.numeric(hh_s1q02 == 1),
    age = hh_s1q04a,
    sexfem = as.numeric(hh_s1q03 == 2),
    mar = as.numeric(hh_s1q08 %in% c(2,3)))

# Pooled
eth_indiv_pool_2dem <- do.call("rbind", list(eth_indiv_2011_sec1, eth_indiv_2013_sec1, eth_indiv_2015_sec1))
rm(eth_indiv_2011_sec1, eth_indiv_2013_sec1, eth_indiv_2015_sec1)

# Individual years of education ------------------------------------------------

eth_indiv_2011_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect2_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id),
    educ = ifelse(hh_s2q03 == 2 | is.na(hh_s2q03),0,hh_s2q05)) %>% 
  transmute(
    year = 2011,
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    educ = ifelse(is.infinite(educ), NA_real_, educ),
    educ = case_when(
      educ %in% c(98) ~ 0,
      educ %in% c(93, 94, 95, 96) ~ 5,
      educ %in% c(9, 21) ~ 9,
      educ %in% c(10, 22) ~ 10,
      educ %in% c(11, 23) ~ 11,
      educ %in% c(12, 24) ~ 12,
      educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      educ %in% c(17, 30) ~ 15,
      educ %in% c(18, 31, 32, 33) ~ 16,
      educ %in% c(19, 34) ~ 17,
      educ %in% c(20, 35) ~ 18,
      TRUE ~ educ)) 

eth_indiv_2013_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect2_hh_w2.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, household_id),
    educ = ifelse(hh_s2q03 == 2 | is.na(hh_s2q03),0,hh_s2q05)) %>% 
  transmute(
    year = 2013,
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    educ = case_when(
      educ %in% c(98) ~ 0,
      educ %in% c(93, 94, 95, 96) ~ 5,
      educ %in% c(9, 21) ~ 9,
      educ %in% c(10, 22) ~ 10,
      educ %in% c(11, 23) ~ 11,
      educ %in% c(12, 24) ~ 12,
      educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      educ %in% c(17, 30) ~ 15,
      educ %in% c(18, 31, 32, 33) ~ 16,
      educ %in% c(19, 34) ~ 17,
      educ %in% c(20, 35) ~ 18,
      educ %in% c(76) ~ NA_real_,
      TRUE ~ educ))

eth_indiv_2015_sec2 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect2_hh_w3.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid),
    educ = ifelse(hh_s2q03 == 2,0,hh_s2q05)) %>% 
  transmute(
    year = 2015,
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    educ = case_when(
      educ %in% c(98) ~ 0,
      educ %in% c(93, 94, 95, 96) ~ 5,
      educ %in% c(9, 21) ~ 9,
      educ %in% c(10, 22) ~ 10,
      educ %in% c(11, 23) ~ 11,
      educ %in% c(12, 24) ~ 12,
      educ %in% c(13, 14, 15, 16, 25, 26, 27, 28, 29) ~ 13,
      educ %in% c(17, 30) ~ 15,
      educ %in% c(18, 31, 32, 33) ~ 16,
      educ %in% c(19, 34) ~ 17,
      educ %in% c(20, 35) ~ 18,
      TRUE ~ educ))

# Pooled
eth_indiv_pool_3educ <- do.call("rbind", list(eth_indiv_2011_sec2, eth_indiv_2013_sec2, eth_indiv_2015_sec2))
rm(eth_indiv_2011_sec2, eth_indiv_2013_sec2, eth_indiv_2015_sec2)

# Individual employment outcomes -----------------------------------------------

eth_indiv_2011_sec4 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2011", "sect4_hh_w1.dta"), 
  generate.factors = F, convert.factors = F) %>% 
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ea_id,
    hhid = paste0(ea, household_id),
    # sets to sector of out-of-household employment OR sector to farm or non-farm *non-temporary* 
    # work also within the household by job with most amount of hours in the past week 
    sect = case_when(
      hh_s4q09 != 1 & (hh_s4q04 > 20 & (hh_s4q04 >= hh_s4q05 | is.na(hh_s4q05))) ~ 1,
      hh_s4q09 != 1 & (hh_s4q05 > 20 & (hh_s4q05 > hh_s4q04 | is.na(hh_s4q04))) ~ 0,
      TRUE ~ as.numeric(as.character(hh_s4q11_b)))) %>% 
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2011,
    # sets as employed if employed outside the household OR if not employed 
    # outside the household but engaging in *non-temporary* farm or non-farm work, 
    # perhaps for a household business, in the past 7 days
    emp = ifelse(hh_s4q09 != 1 & (hh_s4q04 > 20 | hh_s4q05 > 20), 1 , hh_s4q09),

    # in some cases no sector was recorded - in this case we drop the observation from
    # emp_farm and emp_non_farm as we can't be sure.
    emp = as.numeric(emp == 1 & !is.na(emp)),
    emp_farm = ifelse(emp == 1 & sect == 1, 1, 0),
    emp_nonfarm = ifelse(emp == 1 & sect != 1, 1, 0))

eth_indiv_2013_sec4 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2013", "sect4_hh_w2.dta"), 
  generate.factors = F, convert.factors = F
  ) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2013) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid),
    # sets to sector of out-of-household employment OR sector to farm or non-farm *non-temporary* 
    # work also within the household by job with most amount of hours in the past week 
    sect = case_when(
      hh_s4q09 != 1 & (hh_s4q04 > 20 & (hh_s4q04 >= hh_s4q05 | is.na(hh_s4q05))) ~ 1,
      hh_s4q09 != 1 & (hh_s4q05 > 20 & (hh_s4q05 > hh_s4q04 | is.na(hh_s4q04))) ~ 0,
      TRUE ~ as.numeric(as.character(hh_s4q11_b)))) %>%
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2013,
    # sets as employed if employed outside the household OR if not employed 
    # outside the household but engaging in *non-temporary* farm or non-farm work, 
    # perhaps for a household business, in the past 7 days
    emp = ifelse(hh_s4q09 != 1 & (hh_s4q04 > 20 | hh_s4q05 > 20), 1 , hh_s4q09),

    # in some cases no sector was recorded - in this case we drop the observation from
    # emp_farm and emp_non_farm as we can't be sure.
    emp = as.numeric(emp == 1 & !is.na(emp)),
    emp_farm = ifelse(emp == 1 & sect == 1, 1, 0),
    emp_nonfarm = ifelse(emp == 1 & sect != 1, 1, 0))

eth_indiv_2015_sec4 <- readstata13::read.dta13(
  here::here("Data", "Ethiopia", "eth2015", "Household", "sect4_hh_w3.dta"), 
  generate.factors = F, convert.factors = F
  ) %>% 
  # remove split households
  filter(household_id2 %in% (eth_pool_1admin %>% filter(year == 2015) %>% pull(hhid2))) %>%  
  mutate(
    admin1 = saq01,
    admin2 = saq02,
    admin3 = saq03,
    rural = as.numeric(rural == 1),
    ea = ifelse(ea_id == "", ea_id2, ea_id),
    hhid = ifelse(household_id == "", household_id2, household_id),
    hhid = paste0(ea, hhid),
    # sets to sector of out-of-household employment OR sector to farm or non-farm *non-temporary* 
    # work also within the household by job with most amount of hours in the past week 
    sect = case_when(
      hh_s4q09 != 1 & (hh_s4q04 > 20 & (hh_s4q04 >= hh_s4q05 | is.na(hh_s4q05))) ~ 1,
      hh_s4q09 != 1 & (hh_s4q05 > 20 & (hh_s4q05 > hh_s4q04 | is.na(hh_s4q04))) ~ 0,
      TRUE ~ as.numeric(as.character(hh_s4q11_b)))) %>%
  transmute(
    hhid = hhid,
    indiv = paste0(hhid, "_", individual_id),
    year = 2015,
    # sets as employed if employed outside the household OR if not employed 
    # outside the household but engaging in *non-temporary* farm or non-farm work, 
    # perhaps for a household business, in the past 7 days
    emp = ifelse(hh_s4q09 != 1 & (hh_s4q04 > 20 | hh_s4q05 > 20), 1 , hh_s4q09),
    # in some cases no sector was recorded - in this case we drop the observation from
    # emp_farm and emp_non_farm as we can't be sure.
    emp = as.numeric(emp == 1 & !is.na(emp)),
    emp_farm = ifelse(emp == 1 & sect == 1, 1, 0),
    emp_nonfarm = ifelse(emp == 1 & sect != 1, 1, 0))

# Cross-section
eth_indiv_pool_4emp <- do.call("rbind", list(eth_indiv_2011_sec4, eth_indiv_2013_sec4, eth_indiv_2015_sec4))
rm(eth_indiv_2011_sec4, eth_indiv_2013_sec4, eth_indiv_2015_sec4)

# INDIVIDUAL LONGITUDINAL DATASET OUTPUT ---------------------------------------

eth_indiv_panel <- list(eth_indiv_pool_2dem, eth_indiv_pool_3educ, eth_indiv_pool_4emp) %>% 
  # The large amount of "rows only in Y" is due to not filtering these to be adults
  reduce(inner_join) %>% 
  ungroup() %>% 
  # Keep only those households in the main household dataset and merge
  inner_join(eth_hh_panel)

saveRDS(eth_indiv_panel, here::here("Data", "ethiopia_indivpanel.rds"))

